Load Libraries

# install.packages('knitr', repos = c('http://rforge.net', 'http://cran.rstudio.org'), type = 'source')
# install.packages("ggplot2")
# install.packages("gganimate")
# install.packages("gifski")
# install.packages("png")
# install.packages("transformr")
library("knitr")
library("ggplot2")
## Warning: package 'ggplot2' was built under R version 3.4.4
library("gganimate")
## Warning: package 'gganimate' was built under R version 3.4.4
library("gifski")
## Warning: package 'gifski' was built under R version 3.4.4
library("png")
## Warning: package 'png' was built under R version 3.4.4
library("transformr")
## Warning: package 'transformr' was built under R version 3.4.4

Dancing Landscape

Sources of Dancing Landscapes

  • Complex behavior
    • When factors that create landscapes are interdependent
    • When factors that create landscapes change over time

Examples of Dancing Landscapes

  • The stock market
  • Customer demand by product tag over time

Demonstration

f2D <- function(x, t) {
  return (cos(x) * sin(t * pi) - (abs(x)/10) + x/20)
}

par(mfrow=c(3, 3))
plot(f2D(seq(-10, 10, 0.1), 0.00), type="l")
plot(f2D(seq(-10, 10, 0.1), 0.17), type="l")
plot(f2D(seq(-10, 10, 0.1), 0.33), type="l")
plot(f2D(seq(-10, 10, 0.1), 0.50), type="l")
plot(f2D(seq(-10, 10, 0.1), 0.67), type="l")
plot(f2D(seq(-10, 10, 0.1), 0.83), type="l")
plot(f2D(seq(-10, 10, 0.1), 1.00), type="l")
plot(f2D(seq(-10, 10, 0.1), 1.17), type="l")
plot(f2D(seq(-10, 10, 0.1), 1.33), type="l")

par(mfrow=c(1, 1))

Assemble Sample dataframe

constructData2D <- function (func, xMin, xMax, precision, timeMin, timeMax)
{
  # Interval is bounded by gganimate - this just works, do not ask questions
  timeInterval = ((timeMax - timeMin) / 50) + 0.001
  
  data <- data.frame(X=0, Y=0, Time=0)
  i = 1
  for (t in seq(timeMin, timeMax, timeInterval))
  {
    for (x in seq(xMin, xMax, precision))
    {
      data[i,]$X = x
      data[i,]$Y = func(x, t)
      data[i,]$Time = t
      i = i + 1
    }
  }
  
  return (data)
}

timeData = constructData2D(f2D, -10, 10, 0.2, 0, 5)

## Output the dataframe and the graph it represents
str(timeData)
## 'data.frame':    5050 obs. of  3 variables:
##  $ X   : num  -10 -9.8 -9.6 -9.4 -9.2 -9 -8.8 -8.6 -8.4 -8.2 ...
##  $ Y   : num  -1.5 -1.47 -1.44 -1.41 -1.38 -1.35 -1.32 -1.29 -1.26 -1.23 ...
##  $ Time: num  0 0 0 0 0 0 0 0 0 0 ...
ggplot(timeData, aes(X, Y, group=Time)) +
  geom_path()

Animation of function over time

visualizeFunction <- function(dataframe)
{
  if (is.null(dataframe$OptimalX))
  {
    ggplot(dataframe, aes(X, Y, group=Time)) +
      geom_path() +
      transition_states(Time, transition_length=1, state_length=1, wrap=F) +
      labs(title = "Time: {closest_state}") +
      enter_fade() + 
      exit_fade()
  }
  else
  {
    ggplot(dataframe, aes(X, Y, group=Time)) +
      geom_path() +
      geom_point(x=dataframe$OptimalX, y=dataframe$OptimalY, colour="hotpink", size=4) +
      transition_states(Time, transition_length=1, state_length=1, wrap=F) +
      labs(title = "Time: {closest_state}") +
      enter_fade() + 
      exit_fade()
  }
}

visualizeFunction(timeData)

Setup Hill Climbing

The function updates the OptimalX and OptimalY columns of a dataframe containing X, Y, and Time to include the best results from hill climbing optimization function. It attempts to find the maxima.

ticksPerUnitTime controls how fast the hill climbing runs compared to the function. For every increase in the time value of 1.0, the hill climbing will update its value ticksPerUnitTime times.

costFunction is the function being optimized, of the form y = costFunction(x, time).

At time = 0, the algorithm will use a single random guess. That is, do not expect the first value to be a good one. Setting startX can control this instead of a random value.

windowSize controls how far the hill climb algorithm can guess for each guess. It will look up to (windowSize / 2) left or right from its current best

hillClimb <- function(timeDataFrame, ticksPerUnitTime, costFunction, startX = NULL, windowSize = 1)
{
  # Setup variables
  currentTime = min(timeDataFrame$Time)
  currentTick = 1
  currentSubset = timeDataFrame[timeDataFrame$Time == currentTime,]
  bestX = startX
  if (is.null(bestX))
  {
    bestX = runif(1, min(currentSubset$X), max(currentSubset$X))
  }
  bestY = costFunction(bestX, currentTime)
  width = windowSize / 2

  # Record optimal x and y
  timeDataFrame[timeDataFrame$Time == currentTime,]$OptimalX = rep(bestX, nrow(currentSubset))
  timeDataFrame[timeDataFrame$Time == currentTime,]$OptimalY = rep(bestY, nrow(currentSubset))

  # Iterate through time
  nextSubset = timeDataFrame[timeDataFrame$Time > currentTime,]
  while (nrow(nextSubset) > 0)
  {
    # Calculate number of hill climb ticks (guesses) to do
    nextTime = min(nextSubset$Time)
    iterations = floor(nextTime * ticksPerUnitTime) - currentTick
    
    # Update variables
    currentTime = nextTime
    currentSubset = timeDataFrame[timeDataFrame$Time == currentTime,]
    getOption <- function() { 
      return (runif(1, 
                    max(bestX - width, min(currentSubset$X)), 
                    min(bestX + width, max(currentSubset$X))))
    }
    bestY = costFunction(bestX, currentTime)

    # Actually do hill climbing
    if (iterations >= 1)
    for (i in 1:iterations)
    {
      currentTick = currentTick + 1
      testX = getOption()
      testY = costFunction(testX, currentTime)
      if (testY > bestY)
      {
        bestX = testX
        bestY = testY
      }
    }
    
    # Record new optimal x and y
    timeDataFrame$OptimalX[timeDataFrame$Time == currentTime] = rep(bestX, nrow(currentSubset))
    timeDataFrame$OptimalY[timeDataFrame$Time == currentTime] = rep(bestY, nrow(currentSubset))
    
    # Load next time subset
    nextSubset = timeDataFrame[timeDataFrame$Time > currentTime,]
  }
  
  return (timeDataFrame)
}

Run and Visualize Hill Climbing

# Approximately 1 guess per tick
timeData = hillClimb(timeData, 9, f2D, startX = -10)
## Warning in `[<-.data.frame`(`*tmp*`, timeDataFrame$Time == currentTime, :
## provided 4 variables to replace 3 variables

## Warning in `[<-.data.frame`(`*tmp*`, timeDataFrame$Time == currentTime, :
## provided 4 variables to replace 3 variables
visualizeFunction(timeData)
## Warning: Removed 101 rows containing missing values (geom_point).
## Warning: Removed 101 rows containing missing values (geom_point).

## Warning: Removed 101 rows containing missing values (geom_point).
# Approximately 0.3 guesses per tick
timeData = hillClimb(timeData, 3, f2D, startX = -10)
visualizeFunction(timeData)

# Approximately 3 guesses per tick
timeData = hillClimb(timeData, 28, f2D, startX = -10)
visualizeFunction(timeData)

Takeaways

NetLogo in R

This next section sets up an agent-based model that exhibits complexity. We will use it to for the following workshop, where you will attempt to optimize over the output from the model.

The model is a square grid of cells, wrapping on both axes, with one agent per cell. Each agent has a value that it can adjust up or down. An agent wants to have a higher value than three of its neighbors, but does not want to be the highest. Agents will:

Setup Utility Functions

# From https://stackoverflow.com/questions/2453326/fastest-way-to-find-second-third-highest-lowest-value-in-vector-or-column
maxN <- function(x, N=2){
  len <- length(x)
  if(N>len){
    warning('N greater than length(x).  Setting N=length(x)')
    N <- length(x)
  }
  sort(x,partial=len-N+1)[len-N+1]
}
# End sourced code

getAgentChange <- function(agentValue, neighborValues)
{
  neighborValues = c(neighborValues, agentValue)
  if (agentValue >= max(neighborValues))
    return (-3)
  if (agentValue < mean(neighborValues))
    return (1)
  if (agentValue >= mean(neighborValues) && agentValue < maxN(neighborValues, 2))
    return (1)
  return (0)
}

Setup grid

buildWorld <- function(height, width, startingValues = NULL)
{
  if (is.null(startingValues))
  {
    startingValues = rep(100, height * width)
  }
  densityValue = 1 / (height * width)
  sumStartValues = sum(startingValues)
  world <- data.frame(X=0, Y=0, Value=startingValues[1], Density = densityValue)
  i = 1
  for (x in 1:width)
  {
    for (y in 1:height)
    {
      world[i,]$X = x
      world[i,]$Y = y
      world[i,]$Value = startingValues[i]
      world[i,]$Density = startingValues[i] / sumStartValues
      i = i + 1
    }
  }
  
  return (world)
}

world = buildWorld(17, 17, seq(100, 128.9, 0.1))

ggplot(world, aes(X, Y, z = Density)) +
  geom_raster(aes(fill = Density)) +
  geom_contour(colour = "white")

Setup Changing Values

updateWorld <- function(world, updateFunction, orthogonalOnly = T)
{
  newWorld = buildWorld(max(world$X), max(world$Y))
  for (x in 1:max(world$X))
  {
    xLower = x - 1
    if (xLower == 0) xLower = max(world$X)
    xUpper = x + 1
    if (xUpper > max(world$X)) xUpper = 1
    
    for (y in 1:max(world$Y))
    {
      yLower = y - 1
      if (yLower == 0) yLower = max(world$Y)
      yUpper = y + 1
      if (yUpper > max(world$Y)) yUpper = 1
      
      neighborValues = c()
      neighborValues = c(neighborValues, world[world$X == xUpper & world$Y == y,]$Value)
      neighborValues = c(neighborValues, world[world$X == xLower & world$Y == y,]$Value)
      neighborValues = c(neighborValues, world[world$X == x & world$Y == yUpper,]$Value)
      neighborValues = c(neighborValues, world[world$X == x & world$Y == yLower,]$Value)

      
      if (!orthogonalOnly)
      {
        neighborValues = c(neighborValues, world[world$X == xUpper & world$Y == yUpper,]$Value)
        neighborValues = c(neighborValues, world[world$X == xUpper & world$Y == yLower,]$Value)
        neighborValues = c(neighborValues, world[world$X == xLower & world$Y == yUpper,]$Value)
        neighborValues = c(neighborValues, world[world$X == xLower & world$Y == yLower,]$Value)
      }

      currentValue = world[newWorld$X == x & newWorld$Y == y,]$Value
      newWorld[newWorld$X == x & newWorld$Y == y,]$Value = currentValue + updateFunction(currentValue, neighborValues)
    }
  }
  
  newWorld$Density = newWorld$Value / sum(newWorld$Value)
  
  return (newWorld)
}

world2 = updateWorld(world, getAgentChange)
ggplot(world2, aes(X, Y, z = Density)) +
  geom_raster(aes(fill = Density)) +
  geom_contour(colour = "white")

Construct time data

# Assembles the base data - what elevation is each coordinate at for each time step
buildTimeWorld <- function(width, height, iterations)
{
  lastStep = buildWorld(width, height, seq(100, 100+(height * width * 0.1), 0.1))
  lastStep$Time = rep(1, nrow(lastStep))
  
  totalData = lastStep
  
  for (t in 2:iterations)
  {
    newData = updateWorld(lastStep, getAgentChange)
    newData$Time = rep(t, nrow(newData))
    lastStep = newData
    
    totalData = rbind(totalData, newData)
  }

  return(totalData)
}

# Given a set of points over time, outputs the optimal point at each time step
getOptimalData <- function(timeWorld)
{
  optimalData = data.frame(OptimalX = 0, OptimalY = 0, Time = 0)
  i = 1
  for (t in min(timeWorld$Time):max(timeWorld$Time))
  {
    timeSubset = timeWorld[timeWorld$Time == t,]
    optimalData[i,]$OptimalX = timeSubset[timeSubset$Value == max(timeSubset$Value),][1,]$X
    optimalData[i,]$OptimalY = timeSubset[timeSubset$Value == max(timeSubset$Value),][1,]$Y
    optimalData[i,]$Time = t
    i = i + 1
  }
  return (optimalData)
}

# Given a set of points over time and an optimization function, returns the optimization function's
# guess of optimality for each time step
getGuesses <- function(timeWorld, optimizeFunction, ticksPerTime = 1, ...)
{
  guessData = data.frame(GuessedX = 0, GuessedY = 0, Time = 0)
  xGuess = floor(mean(timeWorld$X))
  yGuess = floor(mean(timeWorld$Y))
  i = 1
  for (t in min(timeWorld$Time):max(timeWorld$Time))
  {
    timeSubset = timeWorld[timeWorld$Time == t,]
    guesses = optimizeFunction(timeSubset, ticksPerTime, xGuess, yGuess, ...)
    xGuess = guesses[1]
    yGuess = guesses[2]
    guessData[i,]$GuessedX = xGuess
    guessData[i,]$GuessedY = yGuess
    guessData[i,]$Time = t
    i = i + 1
  }
  return (guessData)
}

# Horrible ugly monster function to encapsulate a bunch of work.
# Builds a world (or reuses the one passed in a timeWorld). Uses that to obtain optimal points
# (if includeOptimalPoints is true), as well as run an optimization function (if optimizeFunction is not
# null). Returns an object with three dataframes (or NULLs) - worldData, optimalPoints, and guessedPoints.
assembleData <- function (width = 17, height = 17, iterations = 50, timeWorld = NULL, optimizeFunction = NULL, ticksPerTime = 1, includeOptimalPoints = T, ...)
{
  finalData <- list(worldData = NULL, optimalPoints = NULL, guessedPoints = NULL)
  if (is.null(timeWorld))
  {
    finalData$worldData = buildTimeWorld(width, height, iterations)
  } else {
    finalData$worldData = timeWorld
  }
  if (!is.null(optimizeFunction))
  {
    finalData$guessedPoints = getGuesses(finalData$worldData, optimizeFunction, ticksPerTime, ...)
  }
  if (includeOptimalPoints)
  {
    finalData$optimalPoints = getOptimalData(finalData$worldData)
  }
  
  return (finalData)
}
# Assembling a world only
finalData = assembleData(17, 17, 50)

# Save out the world for reuse
timeWorld = finalData$worldData

# See how much faster this is now?
finalData = assembleData(timeWorld = timeWorld)

Prepare animated visualizer

visualizeFunction <- function(allData, fromtime = 25, untiltime = 50)
{
  fromtime = max(min(fromtime, max(allData$worldData$Time)), min(allData$worldData$Time))
  untiltime = max(min(untiltime, max(allData$worldData$Time)), min(allData$worldData$Time))
  
  worldData = allData$worldData[allData$worldData$Time >= fromtime & allData$worldData$Time <= untiltime,]

  animation = ggplot(worldData, aes(X, Y, z = Density)) +
    geom_raster(aes(fill = Density)) +
    # geom_contour(colour = "white", bins = 3) +
    transition_states(Time, transition_length = 10, state_length = 0, wrap=F) 
    labs(title = "Time: {closest_state}") +
    enter_fade() +
    exit_fade() +
    ease_aes("linear")
  if (!is.null(allData$optimalPoints))
  {
    optData = allData$optimalPoints[allData$optimalPoints$Time >= fromtime & allData$optimalPoints$Time <= untiltime,]
    lengthenFactor = nrow(worldData) / nrow(optData)
    final = optData
    for(i in 2:lengthenFactor) final = rbind(final, optData)
    optData = final
    optData = optData[with(optData, order(Time)), ]

    animation = animation + geom_point(x=optData$OptimalX, y=optData$OptimalY, colour="hotpink", size=4)
  }
  if (!is.null(allData$guessedPoints))
  {
    guessData = allData$guessedPoints[allData$guessedPoints$Time >= fromtime & allData$guessedPoints$Time <= untiltime,]
    lengthenFactor = nrow(worldData) / nrow(guessData)
    final = guessData
    for(i in 2:lengthenFactor) final = rbind(final, guessData)
    guessData = final
    guessData = guessData[with(guessData, order(Time)), ]

    animation = animation + geom_point(x=guessData$GuessedX, y=guessData$GuessedY, colour="lawngreen", size=4)
  }
  animation
}

Visualize

visualizeFunction(finalData)

Setup Optimization Library

All functions have a standardized signature:

Evaluation

evaluate <- function (allData, fromtime = 25, untiltime = 50)
{
  if (is.null(allData$optimalPoints) || is.null(allData$guessedPoints))
  {
    stop("Incomplete data for evaluate; allData is missing either optimalPoints or guessedPoints")
  }
  expected = allData$optimalPoints
  actual = allData$guessedPoints
  
  fromtime = max(min(fromtime, max(expected$Time)), min(expected$Time))
  untiltime = max(min(untiltime, max(expected$Time)), min(expected$Time))
  
  averageDistance = 0
  for (t in fromtime:untiltime)
  {
    expectedCoords = expected[expected$Time == t,]
    actualCoords = actual[actual$Time == t,]
    distance = sqrt((expectedCoords$OptimalX - actualCoords$GuessedX) ^ 2 + (expectedCoords$OptimalY - actualCoords$GuessedY) ^ 2)
    averageDistance = averageDistance + distance
  }
  averageDistance = averageDistance / (untiltime - fromtime + 1)
  return (averageDistance)
}

Random

randomGuesses <- function(currentData, ticks, startX = 1, startY = 1)
{
  bestX = round(runif(1, min = min(currentData$X), max = max(currentData$X)))
  bestY = round(runif(1, min = min(currentData$Y), max = max(currentData$Y)))
  return (c(bestX, bestY))
}

Hill Climbing

hillClimb <- function(currentData, ticks, startX = 1, startY = 1)
{
  bestX = startX
  bestY = startY
  bestValue = currentData[currentData$X == bestX & currentData$Y == bestY,]$Value
  for (i in 1:ticks)
  {
    xLower = bestX - 1
    if (xLower == 0) xLower = max(currentData$X)
    xUpper = bestX + 1
    if (xUpper > max(currentData$X)) xUpper = 1
    yLower = bestY - 1
    if (yLower == 0) yLower = max(currentData$Y)
    yUpper = bestY + 1
    if (yUpper > max(currentData$Y)) yUpper = 1
    
    if (currentData[currentData$X == xLower & currentData$Y == bestY,]$Value > bestValue)
    {
      bestX = xLower
      bestValue = currentData[currentData$X == xLower & currentData$Y == bestY,]$Value
    }
    
    if (currentData[currentData$X == xUpper & currentData$Y == bestY,]$Value > bestValue)
    {
      bestX = xUpper
      bestValue = currentData[currentData$X == xUpper & currentData$Y == bestY,]$Value
    }
    
    if (currentData[currentData$X == bestX & currentData$Y == yUpper,]$Value > bestValue)
    {
      bestY = yUpper
      bestValue = currentData[currentData$X == bestX & currentData$Y == yUpper,]$Value
    }
    
    if (currentData[currentData$X == bestX & currentData$Y == yLower,]$Value > bestValue)
    {
      bestY = yLower
      bestValue = currentData[currentData$X == bestX & currentData$Y == yLower,]$Value
    }
  }
  
  return (c(bestX, bestY))
}

Simulated Annealing

Accepts two additional parameters:

  • coolingFunction - function that returns a percent likelihood to swap to a worse position.
  • temperatureTickAmount - each time the cooling function is called, its input increments by this amount. Determines how fast to slide along the cooling function.
linearCoolingFunction <- function (x) { return(1 - 0.1 * x) }
powerCoolingFunction <- function (x) { return(0.75 ^ x) }
decayCoolingFunction <- function (x) { return(1 / (x + 1)) }

simulatedAnnealing <- function(currentData, ticks, startX = 1, startY = 1, coolingFunction = NULL, temperatureTickAmount = NULL)
{
  if (is.null(coolingFunction))
  {
    stop("Simulated Annealing requires CoolingFunction!")
  }
  if (is.null(temperatureTickAmount))
  {
    stop("Simulated Annealing requires TemperatureTickAmount!")
  }
  
  bestX = startX
  bestY = startY
  bestValue = currentData[currentData$X == bestX & currentData$Y == bestY,]$Value
  currentCoolingValue = 0
  for (i in 1:ticks)
  {
    xLower = bestX - 1
    if (xLower == 0) xLower = max(currentData$X)
    xUpper = bestX + 1
    if (xUpper > max(currentData$X)) xUpper = 1
    yLower = bestY - 1
    if (yLower == 0) yLower = max(currentData$Y)
    yUpper = bestY + 1
    if (yUpper > max(currentData$Y)) yUpper = 1
    
    nextX = bestX
    nextY = bestY
    nextValue = 0
    
    if (currentData[currentData$X == xLower & currentData$Y == bestY,]$Value > nextValue)
    {
      nextX = xLower
      nextValue = currentData[currentData$X == xLower & currentData$Y == bestY,]$Value
    }
    
    if (currentData[currentData$X == xUpper & currentData$Y == bestY,]$Value > nextValue)
    {
      nextX = xUpper
      nextValue = currentData[currentData$X == xUpper & currentData$Y == bestY,]$Value
    }
    
    if (currentData[currentData$X == bestX & currentData$Y == yUpper,]$Value > nextValue)
    {
      nextY = yUpper
      nextValue = currentData[currentData$X == bestX & currentData$Y == yUpper,]$Value
    }
    
    if (currentData[currentData$X == bestX & currentData$Y == yLower,]$Value > nextValue)
    {
      nextY = yLower
      nextValue = currentData[currentData$X == bestX & currentData$Y == yLower,]$Value
    }
    
    # The part that makes this simulated annealing, not hill climbing
    currentCoolingValue = currentCoolingValue + temperatureTickAmount
    if ((nextValue < bestValue && runif(1) < coolingFunction(currentCoolingValue)) ||
        nextValue > bestValue)
    {
      bestValue = nextValue
      bestX = nextX
      bestY = nextY
    }
  }
  
  return (c(bestX, bestY))
}

Workshop

randomOutput = assembleData(timeWorld = timeWorld, optimizeFunction = randomGuesses, ticksPerTime = 1)
cat("Random guesses were, on average, off by", evaluate(randomOutput), "units.\n")
## Random guesses were, on average, off by 8.588169 units.
visualizeFunction(randomOutput)

hillClimbOutput = assembleData(timeWorld = timeWorld, optimizeFunction = hillClimb, ticksPerTime = 10)
cat("Hill climbing was, on average, off by", evaluate(hillClimbOutput), "units.\n")
## Hill climbing was, on average, off by 6.260702 units.
visualizeFunction(hillClimbOutput)

saOutput = assembleData(timeWorld = timeWorld, optimizeFunction = simulatedAnnealing, ticksPerTime = 100, coolingFunction = decayCoolingFunction, temperatureTickAmount = 0.05)
cat("Simulated Annealing was, on average, off by", evaluate(saOutput), "units.\n")
## Simulated Annealing was, on average, off by 6.019548 units.
visualizeFunction(saOutput)